

* +++++++++++++++++++++
* FIGURE A11: 
* MOVERS IN FB VS ADMIN
* +++++++++++++++++++++


* prep admin data 
* +++++++++++++++

use "${data_derived}/moves_2016_2017.dta", clear

* reshape wide on age group and year
replace age_group = "0_17" if age_group == "17oryounger"
renvars n_moves*, suff(_)
reshape wide n_moves*, i(origin_ags dest_ags year) j(age_group) string

qui: ds n_moves*
local move_vars `r(varlist)'
renvars n_moves*, suff(_)
reshape wide n_moves*, i(origin_ags dest_ags) j(year)

* create totals over all years
foreach var in `move_vars' { 
	gen `var'_2016_17 = `var'_2016 + `var'_2017
	assert `var'_2016_17 !=.
}

renvars n_moves*, suff(_adm)

* create totals corresponding to those we have in fb 
foreach group in de for m f{
	egen n_moves_`group'_2016_17_adm = rowtotal(n_moves_`group'_0_17_2016_17_adm ///
		n_moves_`group'_18_24_2016_17_adm n_moves_`group'_25_29_2016_17_adm ///
		n_moves_`group'_30_49_2016_17_adm n_moves_`group'_50_64_2016_17_adm ///
		n_moves_`group'_65plus_2016_17_adm)
}

tempfile admin_moves
save `admin_moves'

* prep FB data 
* +++++++++++++++

import delimited "${data_raw}/fb_domestic_migration/movers_matrix_extended.csv", clear

* make border move variable numeric
encode border_move, gen(border_move_num)
drop border_move
ren border_move_num border_move
replace border_move = border_move-1

* collapse by year
gen year = substr(quarter_of_move, 1, 4)
destring year, replace
collapse (rawsum) n_moves (mean) border_move, ///
	by(nuts3_origin nuts3_destination population year)
assert inlist(border_move,0,1)
	
* create balanced panel and impute zeros if row doesn't exist
preserve
	qui: levelsof population, local(populations)
	qui: distinct population
	local pops = `r(ndistinct)'
	qui: distinct year
	local years = `r(ndistinct)'
	bys nuts3_origin : keep if _n==1 
	keep nuts3_origin
	tempfile nuts3 
	save `nuts3'
	ren nuts3_origin nuts3_destination
	cross using `nuts3'
	local expand = `pops' * `years'
	expand `expand'
	bys nuts3_origin nuts3_destination : gen n = _n
	gen population =""
	forval i = 1/`pops' {
		local pop : word `i' of `populations'
		replace population = "`pop'" if (n==`i' | n==`i'+`pops' ///
			| n==`i'+`pops'*2 | n==`i'+`pops'*3)
	}
	gen year = .
	forval i = 1/`years' {
		local u = `i' - 1
		local year = 2016 +`u'
		local range_max = `pops' + `pops'*`u'
		local range_min = `pops'*`u'
		replace year = `year' if n> `range_min' & n <=`range_max'
		}
	drop n
	tempfile balanced_data
	save `balanced_data'
restore

merge 1:1 nuts3_origin nuts3_destination population year using `balanced_data', ///
	assert(2 3) keep(2 3) 

replace n_moves = 0 if _merge ==2 
drop _merge

* get border move variable back in there
bys nuts3_origin nuts3_destination : egen max_border_move = max(border_move)
replace border_move = max_border_move if border_move ==. 
drop max_border_move

* drop rows where origin = destination
drop if nuts3_origin == nuts3_destination

* crosswalk from nuts3 to ags codes
preserve 
import delimited "${data_raw}/nuts_xw/nuts3_DE_xw.csv", clear
keep id nuts3
ren id ags
foreach d in origin destination {
	foreach var in ags nuts3 {
		gen `var'_`d' = `var'
	}
}
drop ags nuts3
tempfile cw_ags
save `cw_ags'
restore 

foreach d in origin destination {
	merge m:1 nuts3_`d' using `cw_ags', nogen assert(3) keepusing(ags_`d')
}

ren (ags_origin ags_destination) (origin_ags dest_ags)
drop nuts3*

* make population variable consistent with admin data
replace population = substr(population, 4,.) if substr(population, 1,3) =="age" 
replace population = substr(population, 1,1) if strpos(population, "male") !=0 
replace population = "de" if population =="native"
replace population = "for" if population =="non_native"

* reshape wide on population and year 
renvars n_moves, suff(_)
reshape wide n_moves, i(origin_ags dest_ags year) j(population) string

qui: ds n_moves*
local move_vars `r(varlist)'
renvars n_moves*, suff(_)
reshape wide n_moves*, i(origin_ags dest_ags) j(year)

* create totals over all 2016 + 2017
foreach var in `move_vars' { 
	gen `var'_2016_17 = `var'_2016 + `var'_2017
	assert `var'_2016_17 !=.
}

renvars n_moves*, suff(_fb)

tempfile fb_moves
save `fb_moves' 


* calculate ratio: FB users / true population in admin data
* +++++++++++++++

* there are fewer people in FB than there are in the true population
* to correct for that we want to inflate the below regression coefficients 
* by the ratio of FB users to true population 
* so need to calculate that 

* admin population data
use "${data_derived}/kreis_total_pop_by_age_gender.dta", clear

keep if year == 2019

* get the age buckets consistent with other data
egen pop_18_24 = rowtotal(pop_m_18_19 pop_m_20_24 pop_f_18_19 pop_f_20_24)
egen pop_30_49 = rowtotal(pop_m_30_34 pop_m_35_39 pop_m_40_44 pop_m_45_49 ///
	pop_f_30_34 pop_f_35_39 pop_f_40_44 pop_f_45_49)
egen pop_50_64 = rowtotal(pop_m_50_54 pop_m_55_59 pop_m_60_64 ///
	pop_f_50_54 pop_f_55_59 pop_f_60_64)	
egen pop_65plus = rowtotal(pop_m_65_74 pop_m_75_plus pop_f_65_74 pop_f_75_plus)	

* number of germans
gen pop_de = pop_tot-pop_for_tot

* keep key vars
keep ags pop_tot pop_de pop_for_tot pop_m_tot pop_f_tot pop_18_24 pop_25_29 ///
	pop_30_49 pop_50_64 pop_65plus 	

* rename vars to make it consistent with other data	
ren (pop_tot pop_for_tot pop_m_tot pop_f_tot) (pop_all pop_for pop_m pop_f)	
	
* save at kreis level to be merged on origin and destination later
preserve 
qui: ds *
local vars `r(varlist)'
foreach var in `vars' {
	foreach d in origin dest {
		gen `d'_`var' = `var'
	}
}
drop `vars'
tempfile pop_admin_kreis
save `pop_admin_kreis'
restore 

* collapse further (total over all of Germany)
collapse (rawsum) pop*

renvars *, suff(_adm)
tempfile pop_admin
save `pop_admin'
	
* FB
import delimited "${data_raw}/fb_domestic_migration/baseline_pops.csv", clear

* make population variable consistent with admin data
replace population = substr(population, 4,.) if substr(population, 1,3) =="age" 
replace population = substr(population, 1,1) if strpos(population, "male") !=0 
replace population = "de" if population =="native"
replace population = "for" if population =="non_native"

* save populations for later
levelsof population, local(populations)
global populations "`populations'"

* also reshape wide
ren n pop_
gen n = 1
reshape wide pop_, i(n) j(population) string
drop n

qui: ds *
local vars `r(varlist)'
renvars *, suff(_fb)

* combine with admin data
cross using `pop_admin'

* calculate fractions
foreach var in `vars' { 
	gen ratio_`var' = `var'_fb / `var'_adm
	qui: su ratio_`var'
	di "`var' 		`r(mean)'"
}

tempfile ratios
save `ratios'

* compare admin and FB data
* +++++++++++++++

* merge moves data admin and FB 
use `fb_moves', clear
merge 1:1 origin_ags dest_ags using `admin_moves', assert(3) nogen

* combine with dataset containing ratios
cross using `ratios'

* create inflated variables 
foreach group in $populations {
	foreach year in 2016_17 2016 2017 2018 2019 {
		gen n_moves_`group'_`year'_fb_infl = n_moves_`group'_`year'_fb / ratio_pop_`group'
	}
}

* add population by origin and destination so that we can do population weights
foreach d in origin dest {
	merge m:1 `d'_ags using `pop_admin_kreis', assert(3) nogen keepusing(`d'*)
}

foreach group in $populations {
	gen pop_origin_dest_`group' = origin_pop_`group' + dest_pop_`group'
}

foreach i in fb_infl adm {
	gen n_moves_18_64_2016_17_`i' = n_moves_18_24_2016_17_`i' + ///
		n_moves_25_29_2016_17_`i' + n_moves_30_49_2016_17_`i' + ///
		n_moves_50_64_2016_17_`i'
	gen l_n_moves_18_64_2016_17_`i' = log(n_moves_18_64_2016_17_`i')
}

corr l_n_moves_18_64_2016_17_fb_infl l_n_moves_18_64_2016_17_adm ///
	[w=pop_origin_dest_all]
local corr : di %03.2f `r(rho)'
reg l_n_moves_18_64_2016_17_fb_infl l_n_moves_18_64_2016_17_adm ///
	[w=pop_origin_dest_all]
local slope : di %03.2f _b[l_n_moves_18_64_2016_17_adm]
local se : di %03.2f _se[l_n_moves_18_64_2016_17_adm]
binscatter l_n_moves_18_64_2016_17_fb_infl l_n_moves_18_64_2016_17_adm ///
	[w=pop_origin_dest_all], nq(40) reportreg ///
	xtitle("Log Number of Moves in Admin Data", size(large)) ///
	ytitle("Log Number of Moves in FB Data", size(large)) ///
	note("Correlation = `corr'" "Slope = `slope' (`se')") ///
	ylabel(,nogrid labsize(large)) ///
	mcolor("black") lcolor("blue") xlabel(, labsize(large))
graph export "${output}/binscatter_l_n_moves_18_64_2016_17_fb_infl_vsl_n_moves_18_64_2016_17_adm.png", replace width(3000)
